#################################################
###                                           ###
###     Ultimate Reserve Risk Calculation     ###
###   by Alessandro Carrato and Luigi Lotti   ###
###       alessandro.carrato@gmail.com        ###
###                                           ###
#################################################


library(ChainLadder) #Load Chain Ladder package
library(tweedie) #for var.power fitting

####ONLY FOR DEBUG####
########START#########

#Load toList function (to convert data.set in matrix ... to be improved?
toList <- function(data) {
  matr<-cbind(data[,1],data[,2])
  for(i in 3:length(data[1,])){
    matr<-cbind(matr,data[,i])
  }
  matr
}

####ONLY FOR DEBUG####
#########END##########

fit_gamma <- function(coeffs,design.type,n){
  pos.gamma<-2 #intercept + next position = 2
  for (i in 1:2){ 
    if (design.type[i] == 1) {
      pos.gamma<-pos.gamma+n
    }
    if (design.type[i] == 2) {
      pos.gamma<-pos.gamma+1
    }
  }
  y<-coeffs[pos.gamma:length(coeffs)]
  y<-y[!is.na(y)]
  x<-seq(1,length(y))
  new <- data.frame(x = seq(1, 2*n))
  new$y<-predict(lm(y~x),new)
  gamma_y<-rep(0,2*n)
  for (k in 1:(2*n)){
    if (!is.na(coeffs[pos.gamma+k-1])){
      gamma_y[k]<-coeffs[pos.gamma+k-1]
    } else {
      gamma_y[k]<-new$y[k]
    }    
  }
  
  out<-c(list(coeffs=append(coeffs[1:(pos.gamma-1)],gamma_y),gamma_y=gamma_y))
  return(out)
}


stochasticReserving <- function(triangle, var.power=1, link.power=0, design.type=c(1,1,0), rereserving=FALSE,##link.power=0 is the log link ...
                                cum=TRUE, exposure=FALSE, bootstrap=1, boot.adj=0, nsim=1000, proc.err=TRUE, p.optim=F,...){
  
  call <- match.call()
  if (!("triangle") %in% class(triangle))
    stop("triangle must be of class 'triangle'")
  if ("offset" %in% names(list(...)))
    stop("'offset' should be passed using the 
         'exposure' attribute of the triangle!")
  if ("weigth" %in% names(list(...)))
    stop("'weight' should not be used")  
  # convert to incremental losses if needed
  # tr.incr <- cum2incr(triangleC)   ## giusto per far prima in debug!!
  tr.incr <- if (cum) cum2incr(triangle) else triangle
  
  # create family                            
  family <- tweedie(var.power=var.power,link.power=link.power)
  
  # convert to long format
  lda <-  as.data.frame(tr.incr)
  lda$offset <- if (is.null(attr(tr.incr,"exposure")))
    rep(0,nrow(lda)) else 
      family$linkfun(attr(tr.incr,"exposure")[lda$origin])
  
  #parameter fix for better intrepretation of results
  base.year=min(lda$origin)
  lda$origin<-lda$origin-base.year+1 # ORIGIN MUST START FROM 1
  lda$dev<-lda$dev-1 # DEVELOPMENT YEAR FROM 0
  lda$cy <- lda$origin + lda$dev
  
  ######################################
  ####DESIGN MATRIX CALCULATION#####
  ######################################
  
  
  #  design matrix per AY
  design.string<-" "
  m_dev=max(lda["dev"]) ##max_dev (IT MUST START from 0!!!)
  m_cy=2*m_dev
  m=nrow(lda)
  
  temp<-rep(1,m)
  
  if (design.type[1] == 0){
    ay<-matrix(0,nrow=m,ncol=1)
  }
  if (design.type[1] == 1){
    
    design.string=paste(design.string,"factor(origin)")
    
    ay<-matrix(0,nrow=m, ncol=m_dev)
    for(r in 1:m){
      if (lda$origin[r]!=1) {ay[r,lda$origin[r]-1]=1}
    }
    ay<-cbind(temp,ay)       
  }
  if (design.type[1] == 2){
    
    design.string=paste(design.string,"origin")
    
    ay<-matrix(0,nrow=m, ncol=1)
    for(r in 1:m){
      ay[r,1]=lda$origin[r]
    }
    ay<-cbind(temp,ay)
  }
  
  #  design matrix per DY
  if (design.type[2] == 0){
    dy<-matrix(0,nrow=m,ncol=1)
  }
  if (design.type[2] == 1){
    
    if (design.type[1]==0) {
      design.string=paste(design.string,"factor(dev)")
    } else {
      design.string=paste(design.string,"+ factor(dev)")
    }
    
    dy<-matrix(0,nrow=m, ncol=m_dev)
    for(r in 1:m){
      if (lda$dev[r]!=0) { dy[r,lda$dev[r]]=1 }
    }
    dy<-cbind(temp,dy)
    
  }
  if (design.type[2] == 2){
    
    if (design.type[1]==0) {
      design.string=paste(design.string,"dev") 
    } else {
      design.string=paste(design.string,"+ dev")
    }
    
    dy<-matrix(0,nrow=m, ncol=1)
    for(r in 1:m){
      dy[r,1]=lda$dev[r]
    }
    dy<-cbind(temp,dy)
  }
  
  #  design matrix per CY
  if (design.type[3] == 0){
    cy<-matrix(0,nrow=m,ncol=1)
  }
  if (design.type[3] == 1){
    
    if (design.type[1]==0 && design.type[2]==0) {
      design.string=paste(design.string,"factor(cy)")
    } else {
      design.string=paste(design.string,"+ factor(cy)")
    }
    
    cy<-matrix(0,nrow=m, ncol=2*m_dev)
    for(r in 1:m) {
      if (lda$cy[r]!=1) {cy[r,lda$cy[r]-1]=1}
    }
    cy<-cbind(temp,cy)
    
  }
  if (design.type[3] == 2){
    
    if (design.type[1]==0 && design.type[2]==0) {
      design.string=paste(design.string,"cy")
    } else {
      design.string=paste(design.string,"+ cy")
    }
    
    cy<-matrix(0,nrow=m, ncol=1)
    for(r in 1:m){
      cy[r,1]=lda$cy[r]
    }
    cy<-cbind(temp,cy)
    
  }
  
  design.matrix<-cbind(ay,dy,cy)
  design.matrix<-cbind(temp,subset(design.matrix, select=-c(1,length(ay[1,])+1,(length(ay[1,])+length(dy[1,])+1))))
  
  ######################################
  ####END DESIGN MATRIX CALCULATION#####
  ######################################
  
  
  ldaFit <- subset(lda,!is.na(lda$value)) 
  ldaOut <- subset(lda,is.na(lda$value))
  
  # fit the model
  #glmFit<-glm(value~factor(origin)+factor(dev),family=quasipoisson(log),data=ldaFit)
  
  if (exposure){
    glmstring<-paste(design.string,", family = family, data=ldaFit,offset=offset")
  } else {
    glmstring<-paste(design.string,", family = family, data=ldaFit")
  }
  
  eval(parse(text=
    c(
      "glmFit<-glm(value ~", glmstring,
      ")"
    )
  )
  )  
  
  ################################
  ## calculate reserve 
  ################################
  
  # prediction for each cell
  
  coeffs <- glmFit$coefficients
  temp_y<-data.frame(gamma_y<-c(0))
  
  # an extrapultion of future CY factors has to be made for the future CY
  if (design.type[3]==1) {
    temp_y<-fit_gamma(coeffs,design.type,n=m_dev) ## NOTE: 0 included!!! so actually we have (n+1) values!!
    coeffs<-temp_y$coeffs
  }
  n <- nrow(ldaFit) ## n° of data points (n)
  d.f<- df.residual(glmFit) ## n° of data points - parameters (n-p)
  bias <- sqrt(n/d.f)
  
  # dispersion
  #phi <- sum(resid(glmFit,type="pearson")^2)/d.f
  phi <- summary(glmFit)$dispersion
  
  lda$eta <- design.matrix %*% coeffs
  if(exposure) {lda$eta <- lda$eta + lda$offset}
  lda$yp <- exp(lda$eta)  
  
  # sum to get reserve by year
  
  resMeanAy <- tapply(lda$yp[is.na(lda$value)],ldaOut$origin, sum)
  resMeanTot <- sum(resMeanAy)       
  
  Reserve <- round(c(resMeanAy,resMeanTot))
  Latest <- getLatestCumulative(incr2cum(tr.incr))[-1L]
  Latest <- c(Latest,sum(Latest))
  Ultimate <- Latest + Reserve
  
  
  count.neg<-0
  ###BOOTSTRAP CYCLE###
  if (bootstrap!=0){
    pb <- winProgressBar(title = "progress bar", min = 1, max = nsim, width = 300)
    resMeanAyB <- matrix(0,length(resMeanAy),nsim)
    resMeanTotB <- rep(0,nsim)
    
    if (rereserving) { ##CREATE 1yr matrix
      resMeanAyB_1yr <- matrix(0,length(resMeanAy),nsim)
      resMeanTotB_1yr <- rep(0,nsim)
      
    }
    
    # loop nsim times
    for (b in 1:nsim){      
      mu <- lda$yp[!is.na(lda$value)]
      
      ### PARAMETRIC BOOTSTRAP ###
      if(bootstrap==1) {
        if(var.power!=0){ ### rtweedie doesn't support var.power = 0
          yB=rtweedie(n,phi=phi,xi=var.power,mu=mu) 
        } else{
          yB=rnorm(n,mean=mu,sd=sqrt(phi)) #### KNOWN ISSUE: Error in glm.fit with negative values .. why?
        }
      } else { ### SEMI-PARAMETRIC BOOTSTRAP ###      
        if(boot.adj==0){  ### WHILE CYCLE until triangle > 0 ... WARNING: COULD BE TIME CONSUMING!!!!
          ybad <- 1 
          tries=0
          while (ybad){
            tries = tries + 1
            if (tries>100) {stop("Too many negative bootstrapped values!!")} ### Added to exit code, otherwise "infinite" WHILE cycle could happen!!
            
            rn <- bias * sample(resid(glmFit, type = "pearson"), n, replace = TRUE)
            yB <- rn * sqrt(family$variance(mu)) + mu
            
            if (all(yB >= 0) || (!is.null(var.power) && var.power == 0)) #### KNOWN ISSUE: Error in glm.fit with negative values .. why?
              ybad <- 0  # Normal 
          }
          
        } else{ ### OVERWRITE NEGATIVE VALUES with 0.01 ... WARNING: COULD LEAD TO LOWER UNCERTAINTY!!
          rn <- bias * sample(resid(glmFit,type="pearson"), n, replace=TRUE) ##adjustment for df
          yB <- rn * sqrt(family$variance(mu)) + mu      
          if (!all(yB>=0) || (!is.null(var.power) && var.power != 0)) {
            for (i in 1:n){
              if (yB[i]<=0) {        
                yB[i]<-0.01
                count.neg<-count.neg+1
              }
            }
          }
        }
      }
      
      
      eval(parse(text=
        c(
          "glmFitB<-glm(yB~",glmstring,
          ")"
        )
      )
      )
      
      coeffsB <- glmFitB$coefficients
      
      if (design.type[3]==1) {
        temp_yB<-fit_gamma(coeffsB,design.type,n=m_dev) ## NOTE: DEV must start with 0!!!
        coeffsB<-temp_yB$coeffs
      }
      
      lda$etaB <- design.matrix %*% coeffsB
      if(exposure) {lda$etaB <- lda$etaB + lda$offset}
      lda$ypB <- exp(lda$etaB)
      
      ##ADD PROCESS ERROR
      if (proc.err){
        n.fut = length(lda$ypB[is.na(lda$value)])
        mu.fut= lda$ypB[is.na(lda$value)]
        if (var.power!=0){
          lda$ypB[is.na(lda$value)]=rtweedie(n.fut,xi=var.power,mu=mu.fut,phi=phi)  
        } else {
          lda$ypB[is.na(lda$value)]=rnorm(n.fut,mean=mu.fut,sd=sqrt(phi))
        }
      }
      
      
      resMeanAyB[,b] <- tapply(lda$ypB[is.na(lda$value)],ldaOut$origin, sum)
      resMeanTotB[b] <- sum(resMeanAyB[,b])
      
      ### 1yr VIEW Calculation ###
      if (rereserving) {
        
        lda$year1=lda$value  ## COPY ALL THE KNOWN VALUES
        next_y <- max(lda$origin)+1 ##NEXT DIAGONAL is for CY = max(origin)+1
        lda$year1[lda$cy==next_y]=lda$ypB[lda$cy==next_y]  ##COPY NEXT DIAGONAL
        ldaFit_1yr = subset(lda,!is.na(lda$year1))
        
        if (exposure){
          glmstring_1yr<-paste(design.string,", family = family, data=ldaFit_1yr,offset=offset")
        } else{
          glmstring_1yr<-paste(design.string,", family = family, data=ldaFit_1yr")
        }
        
        eval(parse(text=
          c(
            "glmFit1yr<-glm(year1~",glmstring_1yr,
            ")"
          )
        )
        )
        
        coeffs1yr <- glmFit1yr$coefficients
        
        if (design.type[3]==1) {
          temp_1yr<-fit_gamma(coeffs1yr,design.type,n=m_dev) ## NOTE: 0 included!!! so actually we have (n+1) values!!
          coeffs1yr<-temp_1yr$coeffs
        }
        
        lda$be1yr <- design.matrix %*% coeffs1yr ##ETA
        if(exposure) {lda$be1yr <- lda$be1yr + lda$offset}        ##ETA + OFFSET
        lda$be1yr <- exp(lda$be1yr)              ##Best Estimate 1yr, for all the values ..
        
        lda$year1[is.na(lda$year1)]=lda$be1yr[is.na(lda$year1)]
        
        resMeanAyB_1yr[,b] <- tapply(lda$year1[is.na(lda$value)],ldaOut$origin, sum)
        resMeanTotB_1yr[b] <- sum(resMeanAyB_1yr[,b])        
      }
      setWinProgressBar(pb, b, title=paste(round(b/nsim*100, 0),"% Done"))
      
    }
    close(pb)
    # compute estimation variance, adjusted by df 
    
    ##the adjustment for DF is included directly in residuals (see England(2002) Addendum) or in parametric bootstrap
    mseEstAy <- apply(resMeanAyB,1,var)
    mseEstTot <- var(resMeanTotB)
    
    avgResAy <- apply(resMeanAyB,1,mean)
    avgResTot <- mean(resMeanTotB)
    
    Expected.Reserve <- round(c(avgResAy,avgResTot))
    
    S.E <- sqrt(c(mseEstAy,mseEstTot))
    CoV <- S.E / Expected.Reserve
    
    if (rereserving) {
      mseEstAy_1yr <- apply(resMeanAyB_1yr,1,var)
      mseEstTot_1yr <- var(resMeanTotB_1yr)
      
      avgResAy_1yr <- apply(resMeanAyB_1yr,1,mean)
      avgResTot_1yr <- mean(resMeanTotB_1yr)
      
      Expected.Reserve_1yr <- round(c(avgResAy_1yr,avgResTot_1yr))
      
      S.E_1yr <- sqrt(c(mseEstAy_1yr,mseEstTot_1yr))
      CoV_1yr <- S.E_1yr / Expected.Reserve_1yr
      
      Emergence.Pattern <- S.E_1yr/S.E
    }
  }
  
  # percentage of negative values modified
  
  perc.neg<-count.neg/(nsim*n)
  
  
  #WRITING REPORT
  if (bootstrap!=0) {
    if (rereserving) {
      resDf <- data.frame(Latest=Latest, 
                          Expected.Reserve=Expected.Reserve,
                          Prediction.Error=S.E,
                          CoV=CoV,
                          Expected.Ultimate=Latest+Expected.Reserve,
                          GLMReserve=Reserve,
                          Expected.Reserve_1yr=Expected.Reserve_1yr,
                          Prediction.Error_1yr=S.E_1yr,
                          Emergence.Pattern = Emergence.Pattern)                     
    } else {
      resDf <- data.frame(Latest=Latest, 
                          Expected.Reserve=Expected.Reserve,
                          Prediction.Error=S.E,
                          CoV=CoV,
                          Expected.Ultimate=Latest+Expected.Reserve,
                          GLMReserve=Reserve,
                          Dev.To.Date=Latest/Ultimate)
    }
    
  } else {
    resDf <- data.frame(Latest=Latest, 
                        Reserve=Reserve,
                        Dev.To.Date=Latest/Ultimate,
                        Ultimate=Ultimate)
  }
  
  row.names(resDf) <- c(as.character(sort(unique(ldaOut$origin))),"total")
  
  # produce fully projected triangle
  ldaOut$value <- round(lda$yp[is.na(lda$value)])
  FullTriangle <- as.triangle(rbind(ldaFit,ldaOut))
  if (cum)
    FullTriangle <- incr2cum(FullTriangle)
  
  res.diag<-data.frame(unscaled=resid(glmFit,type="pearson"),
                       unscaled.biasadj=resid(glmFit,type="pearson")*bias,
                       scaled=resid(glmFit,type="pearson")/sqrt(phi),
                       scaled.biasadj=resid(glmFit,type="pearson")*bias/sqrt(phi),
                       dev=ldaFit$dev,
                       origin=ldaFit$origin,
                       cy=ldaFit$cy)
  
  # output
  out<-list(call=call,
            summary=resDf,
            Triangle=triangle,
            FullTriangle=FullTriangle,
            scale=phi,
            bias=bias,
            GLMReserve=resMeanTot,
            gamma_y=temp_y$gamma_y,
            res.diag=res.diag)
  
  if (bootstrap!=0) {
    if (rereserving) {
      out<-c(out,list(distr.res_ult=resMeanTotB,
                      distr.res_1yr=resMeanTotB_1yr
      ))
    } else {
      out<-c(out,list(distr.res_ult=resMeanTotB))  
    }
  }
  
  out<-c(out,glmFit[!(names(glmFit) %in% c("call"))])
  
  if (p.optim){
    #library(tweedie)
    p<-tweedie.profile(value~as.factor(dev)+cy,p.vec=c(seq(0, 2, by=0.1),3),data=ldaFit,offset=offset,do.plot=T)
    
    if (exposure){
      glmstring<-paste(design.string,", family = family, data=ldaFit,p.vec=c(seq(0, 2, by=0.1),3),do.plot=T,offset=offset")
    } else {
      glmstring<-paste(design.string,", family = family, data=ldaFit,p.vec=c(seq(0, 2, by=0.1),3),do.plot=T")
    }
    eval(parse(text=
      c(
        "p<-tweedie.profile(value ~", glmstring,
        ")"
      )
    )
    ) 
    
  }
  
  class(out) <- "glm"                     
  return(out)  
}


RC_report <- function(res,q=c(0.4,0.5,0.7,0.9,0.95,0.995,0.9993),RC.var=0.995){
  #if (class(res) != "stochasticReserving")
  #  stop("res must be of class 'stochasticReserving'")
  
  out<-list("Ultimate_View"=round(c(mean(res$distr.res_ult),sd(res$distr.res_ult)/mean(res$distr.res_ult),quantile(res$distr.res_ult,q)),4),
           "1yr_View"=round(c(mean(res$distr.res_1yr),sd(res$distr.res_1yr)/mean(res$distr.res_1yr),quantile(res$distr.res_1yr,q)),4),
           "RC_Ultimate"=round(quantile(res$distr.res_ult,RC.var)-mean(res$distr.res_ult),2),
           "RC_1yr"=round(quantile(res$distr.res_1yr,RC.var)-mean(res$distr.res_1yr),2),
           "Diagnostic"=round(c(res$GLMReserve,mean(res$distr.res_ult),mean(res$distr.res_1yr)),2))
  for (i in 1:2){
    attr(out[[i]],"names")[1]="mean"
    attr(out[[i]],"names")[2]="CoV"
  }
  names(out[[5]])=c("GLMReserve","mean(Ultimate View)","mean(1yr View)") 
  return(out)
}